{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class Layer(object):\n",
    "    def __init__(self,name):\n",
    "        self.name = name\n",
    "        self.params,self.grads = None,None\n",
    "    def forward(self,inputs):\n",
    "        raise NotImplementedError\n",
    "    def backward(self,grad):\n",
    "        raise NotImplementedError"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "def ZerosInit(*args):\n",
    "    print(args)\n",
    "    return np.zeros(args[0])\n",
    "\n",
    "def RandomInit(*args):\n",
    "    print(args)\n",
    "    return np.random.randn(args[0][0],args[0][1])\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.09211359, -1.89176291],\n",
       "       [ 0.33310465, -1.32088441]])"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.randn(1,2)\n",
    "np.random.randn(2,2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Dense(Layer):\n",
    "    def __init__(self,num_in,num_out,w_init=RandomInit,b_init=ZerosInit):\n",
    "        # 在layer中层的名字定义为Linear\n",
    "        super().__init__(\"Linear\")\n",
    "        self.params={\n",
    "            \"w\":w_init([num_in,num_out]),\n",
    "            \"b\":b_init([1,num_out])\n",
    "        }\n",
    "        self.inputs = None\n",
    "    \n",
    "    def forward(self,inputs):\n",
    "        self.inputs = inputs\n",
    "        return inputs * self.params[\"w\"]+self.params[\"b\"]\n",
    "    \n",
    "    def backward(self,grad):\n",
    "        # Doutput/Dw  当前层w的梯度\n",
    "        self.grads[\"w\"] = self.inputs * grad\n",
    "        # Doutput/Db，当前层b的梯度\n",
    "        self.grads[\"b\"] = np.sum(grad,axis=0)\n",
    "        # Doutput/Dx,用于链式规则的后续部分求导\n",
    "        return grad * self.params[\"w\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "([2, 2],)\n",
      "([10, 10],)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(array([[-0.4177726 ,  1.05959241],\n",
       "        [ 0.44516585,  1.14050681]]),\n",
       " array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
       "        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]))"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "RandomInit([2,2]),ZerosInit([10,10])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Activation(Layer):\n",
    "    # base activation layer\n",
    "    def __init__(self,name):\n",
    "        super().__init__(name)\n",
    "        self.inputs = None\n",
    "        \n",
    "    def forward(self,inputs):\n",
    "        self.inputs = inputs\n",
    "        return self.func(inputs)\n",
    "    def backward(self,grad):\n",
    "        return self.derivative_func(self.inputs) * grad\n",
    "    \n",
    "    def func(self,x):\n",
    "        raise NotImplementedError\n",
    "    def derivative_func(self,x):\n",
    "        raise NotImplementedError"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class ReLU(Activation):\n",
    "    # ReLU activation function\n",
    "    def __init__(self):\n",
    "        super().__init__(\"ReLU\")\n",
    "    def func(self,x):\n",
    "        # relu激活函数的前向计算过程\n",
    "        return np.maximum(x,0.0)\n",
    "    def derivative_func(self,x):\n",
    "        # drelu/dx\n",
    "        # 当x>0时，drelu/dx=1 true\n",
    "        # 当x<=0时，drelu/dx=0 false\n",
    "        return x > 0.0\n",
    "class Sigmoid(Activation):\n",
    "    # sigmoid activation function\n",
    "    def __init__(self):\n",
    "        super().__init__(\"Sigmoid\")\n",
    "    def func(self,x):\n",
    "        # sigmoid\n",
    "        return 1/(1+np.exp(-x))\n",
    "    def derivative_func(self,x):\n",
    "        y = self.func(x)\n",
    "        return y * (1-y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "([2, 4],)\n",
      "([1, 4],)\n",
      "([4, 1],)\n",
      "([1, 1],)\n"
     ]
    }
   ],
   "source": [
    "layer1 = Dense(2,4)\n",
    "layer2 = Sigmoid()\n",
    "layer3 = Dense(4,1)\n",
    "\n",
    "x1=[0.9,0.1]\n",
    "x2=[0.8,0.2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "operands could not be broadcast together with shapes (2,) (2,4) ",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-112-d9975756e532>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlayer1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-106-bf437b51033e>\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m     11\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0minputs\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"w\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"b\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     15\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (2,) (2,4) "
     ]
    }
   ],
   "source": [
    "x=layer1.forward(x1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "x=layer2.forward(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [],
   "source": [
    "x=layer3.forward(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.18265831],\n",
       "       [ 0.60634778],\n",
       "       [ 0.76488293],\n",
       "       [-0.348134  ]])"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def model(x):\n",
    "    x=layer1.forward(x)\n",
    "    x=layer2.forward(x)\n",
    "    x=layer3.forward(x)\n",
    "    return x\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "operands could not be broadcast together with shapes (2,) (2,4) ",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-117-b56c917667e5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-116-d68ed9c17615>\u001b[0m in \u001b[0;36mmodel\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlayer1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m     \u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlayer2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlayer3\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-106-bf437b51033e>\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m     11\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0minputs\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"w\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"b\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     15\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (2,) (2,4) "
     ]
    }
   ],
   "source": [
    "model(x1),model(x1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "unsupported operand type(s) for @: 'list' and 'int'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-105-e20a8266ec6e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlayer1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-94-64e0a5a69138>\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, grad)\u001b[0m\n\u001b[1;32m     15\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     16\u001b[0m         \u001b[0;31m# Doutput/Dw  当前层w的梯度\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrads\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"w\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minputs\u001b[0m \u001b[0;34m@\u001b[0m \u001b[0mgrad\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     18\u001b[0m         \u001b[0;31m# Doutput/Db，当前层b的梯度\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     19\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrads\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"b\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for @: 'list' and 'int'"
     ]
    }
   ],
   "source": [
    "layer1.backward(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'Dense' object has no attribute 'grad'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-91-1c8034944858>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlayer1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlayer1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlayer1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m: 'Dense' object has no attribute 'grad'"
     ]
    }
   ],
   "source": [
    "layer1.name,layer1.params,layer1.grad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Sigmoid'"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "layer2.name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'b': array([[0.]]), 'w': array([[-0.58958781],\n",
       "        [-0.27086268],\n",
       "        [-0.15791097],\n",
       "        [-0.37147295]])}"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "layer3.params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:python3.6.7]",
   "language": "python",
   "name": "conda-env-python3.6.7-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
